# -------------------------------------------------------------------
# Purpose: Creates Table B6
# Author:  Max Posch, 25/07/2025
# Usage:   Source this script to generate the table.
# -------------------------------------------------------------------
# Check that required paths exist
stopifnot(dir.exists(pdataanalysis))
stopifnot(dir.exists(poutputappendix))


# Load data
load(file.path(pdataanalysis, "countyLevel19001940.RData"))


# Compute correlations net of county FE, state-year FE, and population
vars <- c("entropy_coo_adjp_ws", "entropy_cob_adjp_ws", "entropy_race_adjp_ws", "entropy_occ1950_adjp_ws")
cors <- numeric(length(vars))
names(cors) <- vars
for (i in 1:length(vars)) {
  temp_data <- countyLevel19001940 %>%
  select(all_of(c("entropy_namelast_mp_adjp_ws", vars[i], "sum_n_namelast_mp_adjp_ws", "gisjoin_1900_f", "statefip_f", "year_f"))) %>%
  drop_na() 
  
  resid1 <- feols(as.formula("entropy_namelast_mp_adjp_ws ~ sum_n_namelast_mp_adjp_ws | gisjoin_1900_f + statefip_f^year_f"), 
          data = temp_data)$residuals
  resid2 <- feols(as.formula(paste0(vars[i], " ~ sum_n_namelast_mp_adjp_ws | gisjoin_1900_f + statefip_f^year_f")), 
          data = temp_data)$residuals
  
  cors[i] <- round(cor(resid1, resid2), 3)
}


# Compute correlations net of state-year FE and population
temp <- numeric(length(vars))
names(temp) <- vars
for (i in 1:length(vars)) {
  temp_data <- countyLevel19001940 %>%
  select(all_of(c("entropy_namelast_mp_adjp_ws", vars[i], "sum_n_namelast_mp_adjp_ws", "gisjoin_1900_f", "statefip_f", "year_f"))) %>%
  drop_na() 
  
  resid1 <- feols(as.formula("entropy_namelast_mp_adjp_ws ~ sum_n_namelast_mp_adjp_ws | statefip_f^year_f"), 
          data = temp_data)$residuals
  resid2 <- feols(as.formula(paste0(vars[i], " ~ sum_n_namelast_mp_adjp_ws | statefip_f^year_f")), 
          data = temp_data)$residuals
  
  temp[i] <- round(cor(resid1, resid2), 3)
}
cors <- rbind(temp, cors)


# Compute correlations net of year FE and population
temp <- numeric(length(vars))
names(temp) <- vars
for (i in 1:length(vars)) {
  temp_data <- countyLevel19001940 %>%
  select(all_of(c("entropy_namelast_mp_adjp_ws", vars[i], "sum_n_namelast_mp_adjp_ws", "gisjoin_1900_f", "statefip_f", "year_f"))) %>%
  drop_na() 
  
  resid1 <- feols(as.formula("entropy_namelast_mp_adjp_ws ~ sum_n_namelast_mp_adjp_ws | year_f"), 
          data = temp_data)$residuals
  resid2 <- feols(as.formula(paste0(vars[i], " ~ sum_n_namelast_mp_adjp_ws | year_f")), 
          data = temp_data)$residuals
  
  temp[i] <- round(cor(resid1, resid2), 3)
}
cors <- rbind(temp, cors)


# Compute correlations net of county FE, state-year FE
vars <- c("entropy_coo_adjp_ws", "entropy_cob_adjp_ws", "entropy_race_adjp_ws", "entropy_occ1950_adjp_ws")
temp <- numeric(length(vars))
names(temp) <- vars
for (i in 1:length(vars)) {
  temp_data <- countyLevel19001940 %>%
    select(all_of(c("entropy_namelast_mp_adjp_ws", vars[i], "gisjoin_1900_f", "statefip_f", "year_f"))) %>%
    drop_na()

  resid1 <- feols(as.formula("entropy_namelast_mp_adjp_ws ~ 1 | gisjoin_1900_f + statefip_f^year_f"),
    data = temp_data
  )$residuals
  resid2 <- feols(as.formula(paste0(vars[i], " ~ 1 | gisjoin_1900_f + statefip_f^year_f")),
    data = temp_data
  )$residuals

  temp[i] <- round(cor(resid1, resid2), 3)
}
cors <- rbind(temp, cors)


# Compute correlations net of state-year FE and population
temp <- numeric(length(vars))
names(temp) <- vars
for (i in 1:length(vars)) {
  temp_data <- countyLevel19001940 %>%
    select(all_of(c("entropy_namelast_mp_adjp_ws", vars[i], "gisjoin_1900_f", "statefip_f", "year_f"))) %>%
    drop_na()

  resid1 <- feols(as.formula("entropy_namelast_mp_adjp_ws ~ 1 | statefip_f^year_f"),
    data = temp_data
  )$residuals
  resid2 <- feols(as.formula(paste0(vars[i], " ~ 1 | statefip_f^year_f")),
    data = temp_data
  )$residuals

  temp[i] <- round(cor(resid1, resid2), 3)
}
cors <- rbind(temp, cors)


# Compute correlations net of year FE and population
temp <- numeric(length(vars))
names(temp) <- vars
for (i in 1:length(vars)) {
  temp_data <- countyLevel19001940 %>%
    select(all_of(c("entropy_namelast_mp_adjp_ws", vars[i], "gisjoin_1900_f", "statefip_f", "year_f"))) %>%
    drop_na()

  resid1 <- feols(as.formula("entropy_namelast_mp_adjp_ws ~ 1 | year_f"),
    data = temp_data
  )$residuals
  resid2 <- feols(as.formula(paste0(vars[i], " ~ 1 | year_f")),
    data = temp_data
  )$residuals

  temp[i] <- round(cor(resid1, resid2), 3)
}
cors <- rbind(temp, cors)


# Create table
cors_tab <- as_tibble(cors)
cors_tab <- bind_cols(tibble("Variable" = c(
  "Partial Corr. (Period FE)", "Partial Corr. (State-Period FE)", "Partial Corr. (County FE, State-Period FE)",
  "Partial Corr. (Period FE, Population)", "Partial Corr. (State-Period FE, Population)", "Partial Corr. (County FE, State-Period FE, Population)")), cors_tab)
names(cors_tab) <- c("", "\\makecell{Ancestral-country\\\\diversity}", "\\makecell{Birth-country\\\\diversity}", "\\makecell{Racial\\\\diversity}", "\\makecell{Occupational\\\\diversity}")

tablename <- file.path(poutputappendix, "tableB06.tex")
options("modelsummary_format_numeric_latex" = "plain")
datasummary_df(
  cors_tab,
  align = "lcccc",
  output = tablename,
  escape = FALSE
)
get_summary_stats_rows(tablename)
add_table_row(tablename,
  "toprule",
  "\\\\",
  where = "before"
)
cat("Table B6 saved to:", tablename, "\n")